/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Free Software Foundation, Inc.
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.test.full.scenarios;

import java.util.Arrays;
import java.util.List;

import org.apache.metamodel.DataContext;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.Table;
import org.datacleaner.api.AnalyzerResult;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.beans.StringAnalyzer;
import org.datacleaner.beans.standardize.EmailStandardizerTransformer;
import org.datacleaner.beans.stringpattern.PatternFinderAnalyzer;
import org.datacleaner.beans.stringpattern.PatternFinderResult;
import org.datacleaner.beans.stringpattern.PatternFinderResultTextRenderer;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.configuration.DataCleanerConfigurationImpl;
import org.datacleaner.configuration.DataCleanerEnvironment;
import org.datacleaner.configuration.DataCleanerEnvironmentImpl;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.DatastoreConnection;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.datacleaner.job.builder.AnalyzerComponentBuilder;
import org.datacleaner.job.builder.TransformerComponentBuilder;
import org.datacleaner.job.concurrent.MultiThreadedTaskRunner;
import org.datacleaner.job.concurrent.TaskRunner;
import org.datacleaner.job.runner.AnalysisResultFuture;
import org.datacleaner.job.runner.AnalysisRunnerImpl;
import org.datacleaner.result.AnnotatedRowsResult;
import org.datacleaner.result.CrosstabResult;
import org.datacleaner.result.DefaultResultProducer;
import org.datacleaner.result.ResultProducer;
import org.datacleaner.result.renderer.CrosstabTextRenderer;
import org.datacleaner.test.TestHelper;

import junit.framework.TestCase;

@SuppressWarnings("deprecation")
public class PatternFinderAndStringAnalyzerDrillToDetailTest extends TestCase {

    public void testScenario() throws Throwable {
        final TaskRunner taskRunner = new MultiThreadedTaskRunner(5);
        final DataCleanerEnvironment environment = new DataCleanerEnvironmentImpl().withTaskRunner(taskRunner);

        final DataCleanerConfiguration configuration = new DataCleanerConfigurationImpl().withEnvironment(environment);

        final Datastore datastore = TestHelper.createSampleDatabaseDatastore("ds");
        final DatastoreConnection con = datastore.openConnection();
        final DataContext dc = con.getDataContext();

        try (AnalysisJobBuilder ajb = new AnalysisJobBuilder(configuration)) {
            ajb.setDatastoreConnection(con);

            final Table table = dc.getDefaultSchema().getTableByName("EMPLOYEES");
            assertNotNull(table);

            final Column jobTitleColumn = table.getColumnByName("JOBTITLE");
            assertNotNull(jobTitleColumn);

            final Column emailColumn = table.getColumnByName("EMAIL");
            assertNotNull(emailColumn);

            ajb.addSourceColumns(jobTitleColumn, emailColumn);

            final InputColumn<?> emailInputColumn = ajb.getSourceColumnByName("EMAIL");
            final TransformerComponentBuilder<EmailStandardizerTransformer> emailStd1 =
                    ajb.addTransformer(EmailStandardizerTransformer.class).addInputColumn(emailInputColumn);

            final AnalyzerComponentBuilder<PatternFinderAnalyzer> pf = ajb.addAnalyzer(PatternFinderAnalyzer.class);
            final InputColumn<?> jobtitleInputColumn = ajb.getSourceColumnByName("JOBTITLE");
            pf.addInputColumn(jobtitleInputColumn);
            pf.getComponentInstance().setDiscriminateTextCase(false);

            final AnalyzerComponentBuilder<StringAnalyzer> sa = ajb.addAnalyzer(StringAnalyzer.class);
            sa.addInputColumns(emailInputColumn, emailStd1.getOutputColumnByName("Username"),
                    emailStd1.getOutputColumnByName("Domain"));

            final AnalysisResultFuture resultFuture = new AnalysisRunnerImpl(configuration).run(ajb.toAnalysisJob());
            if (!resultFuture.isSuccessful()) {
                throw resultFuture.getErrors().iterator().next();
            }

            // pattern finder result tests
            {
                final PatternFinderResult result = (PatternFinderResult) resultFuture.getResult(pf.toAnalyzerJob());
                final String resultString = new PatternFinderResultTextRenderer().render(result);
                final String[] resultLines = resultString.split("\n");

                assertEquals(resultString, 5, resultLines.length);

                assertEquals(resultString, "                            Match count Sample      ", resultLines[0]);
                assertTrue(resultString, resultLines[1].startsWith("aaaaa aaaaaaaaa                      19"));

                final ResultProducer resultProducer =
                        result.getSingleCrosstab().where("Pattern", "aaaaa aaaaaaaaa").where("Measures", "Match count")
                                .explore();
                assertEquals(DefaultResultProducer.class, resultProducer.getClass());
                final AnalyzerResult result2 = resultProducer.getResult();
                assertEquals(AnnotatedRowsResult.class, result2.getClass());

                final AnnotatedRowsResult annotatedRowsResult = (AnnotatedRowsResult) result2;
                assertEquals(19, annotatedRowsResult.getAnnotatedRowCount());
                final List<InputRow> rows = annotatedRowsResult.getSampleRows();
                assertEquals(19, rows.size());

                final String[] values = new String[19];
                for (int i = 0; i < values.length; i++) {
                    values[i] = (String) rows.get(i).getValue(jobtitleInputColumn);
                }

                Arrays.sort(values);

                assertEquals( "[Sales Rep, Sales Rep, Sales Rep, Sales Rep, Sales Rep, Sales Rep, Sales Rep, "
                                + "Sales Rep, Sales Rep, Sales Rep, Sales Rep, Sales Rep, Sales Rep, Sales Rep, "
                                + "Sales Rep, Sales Rep, Sales Rep, VP Marketing, VP Sales]", Arrays.toString(values));
            }

            // string analyzer tests
            {
                final CrosstabResult result = (CrosstabResult) resultFuture.getResult(sa.toAnalyzerJob());
                final String[] resultLines = new CrosstabTextRenderer().render(result).split("\n");

                assertEquals("                                         EMAIL Username   Domain ", resultLines[0]);
                assertEquals("Total char count                           655      172      460 ", resultLines[6]);
                assertEquals("Max chars                                   31       10       20 ", resultLines[7]);
                assertEquals("Min chars                                   26        5       20 ", resultLines[8]);

                // username is a virtual columns, but because of the
                // row-annotation
                // system it is still possible to drill to detail on it.
                ResultProducer resultProducer =
                        result.getCrosstab().where("Column", "Username").where("Measures", "Max chars").explore();
                assertNotNull(resultProducer);
                assertEquals(AnnotatedRowsResult.class, resultProducer.getResult().getClass());

                // email is a physical column so it IS queryable
                resultProducer = result.getCrosstab().where("Column", "EMAIL").where("Measures", "Max chars").explore();
                assertNotNull(resultProducer);

                final AnalyzerResult result2 = resultProducer.getResult();
                assertEquals(AnnotatedRowsResult.class, result2.getClass());

                final AnnotatedRowsResult arr = (AnnotatedRowsResult) result2;
                final List<InputRow> rows = arr.getSampleRows();
                assertEquals(1, rows.size());
                assertEquals("wpatterson@classicmodelcars.com", rows.get(0).getValue(emailInputColumn).toString());
            }

        }

        con.close();
        taskRunner.shutdown();
    }
}
